---
title: "Generating Networks of Facebook User Movement"
subtitle: "Replication Code"
author: "Timothy Fraser"
date: "February 6, 2021"
output: html_notebook
---

First, this study uses data from Facebook Data for Good, which tallies up the number of Facebook users who were identified to be in the disaster hit area consistently two weeks prior to the disaster. Then, they tally up how many people went from any neighborhood in that region to any other neighborhood. Unfortunately, this data is measured at the neighborhood level, using special polgyons. This is not a level joinable with census data, and by data sharing agreement, the researchers cannot share the original raw data without permission from Facebook. So, to remedy both these issues, we're going to aggregate everything up from the neighborhood level to the census tract level. (After that, we can easily aggregate up to the county level).

# 0. Packages

First, let's load some packages!

```{r, message = FALSE, warning = FALSE}
library(tidyverse) # for tidy data manipulation
# Using the most recent version of dplyr, we can bind_rows() for sf objects
# Download the most recent version here:
# library(remotes)
# remotes::install_github("tidyverse/dplyr")
library(dplyr)
library(dtplyr) # for dplyr functions with data.table, the speedy data manipulation package

# GIS packages
library(sf) # for tidy spatial data
library(rgdal) # for spatial operations
library(tigris) # for accessing census boundaries
library(tidycensus) # for downloading census data
library(censusapi) # for downloading census data
# Networks Packages
library(tidygraph)
library(ggraph)
#remotes::install_github("luukvdmeer/sfnetworks")
library(sfnetworks)
```

# 1. Hurricane Zeta

First, this study tabulates movement for Hurricane Zeta.

## 1.1 Geographic Data

###1.1.1 Get Polygons

First, let's use the excellent tigris package to get polygons for every level of geography we're looking at for Hurricane Zeta.

```{r}
# Import census tracts for the state of Louisiana
# Let's import census tracts for several states
get_tracts = function(STATE){
  tigris::tracts(state = STATE, cb = TRUE, year = 2019) %>%
    st_as_sf() %>%
    st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
    # set all variable names to lowercase
    magrittr::set_colnames(value = names(.) %>% tolower()) %>%
    select(geoid, area_land = aland, geometry) %>%
    filter(!is.na(geoid)) %>%
    return()
}
# Import and bind census tracts for every state that Hurricane Zeta affected
c("LA", "AL", "MS", "GA", "VA",
  "KY", "NC", "SC", "TN", "FL") %>%
  map(~get_tracts(.), .id = "states") %>%
  dplyr::bind_rows() %>%
  saveRDS("shapes/tracts.rds")

# Import all zipcodes in the US 
tigris::zctas(year = 2019, cb = TRUE) %>%
  st_as_sf() %>%
  select(geoid = GEOID10, area_land = ALAND10, geometry) %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/zipcodes.rds")
  
# Import all counties in the US
tigris::counties(year = 2019, cb = TRUE) %>%
  st_as_sf()  %>%
  select(geoid = GEOID, name = NAME, area_land = ALAND, geometry) %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/counties.rds")

# Import all county subdivisions in the US
data.frame(state = state.abb) %>%
  split(.$state) %>%
  map(~tigris::county_subdivisions(state = .$state, year = 2019, cb = TRUE) %>%
        st_as_sf() %>%
        select(geoid = GEOID, name = NAME, area_land = ALAND, geometry) %>%
        st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))),
      .id = "state") %>%
  dplyr::bind_rows() %>%
  saveRDS("shapes/csub.rds")

# Import continental united states
tigris::states(cb = TRUE, year = 2019) %>%
  st_as_sf() %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  select(state = STUSPS, name = NAME, geometry) %>%
  filter(!state %in% c("HI", "AK", "PR", "VI", "MP", "GU", "AS")) %>%
  saveRDS("shapes/states.rds")

```

### 1.1.2 Get Centroids

Now, let's give each zipcode, tract, and county a centroid.

```{r}
# For counties
read_rds("shapes/counties.rds") %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid, name) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/counties_centroid.rds")

# For county subdivisions
read_rds("shapes/csub.rds") %>%
  # Join in state names
  st_join(read_rds("shapes/states.rds")) %>%
  filter(state %in% c("LA", "AL", "MS", "GA", "VA",
  "KY", "NC", "SC", "TN", "FL", "CA")) %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/csub_centroid.rds")


# For zipcodes
read_rds("shapes/zipcodes.rds") %>%
  # Join in state names
  st_join(read_rds("shapes/states.rds")) %>%
  filter(state %in% c("LA", "AL", "MS", "GA", "VA",
  "KY", "NC", "SC", "TN", "FL")) %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/zipcodes_centroid.rds")

# For census tracts
read_rds("shapes/tracts.rds") %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/tracts_centroid.rds")
```



## 1.2. Geolocate Neighborhoods and Edgelist

### 1.2.1 Geolocated Neighborhoods

```{r}
# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Compile Facebook data
# First, let's grab all the names of the files from Hurricane Zeta,
data.frame(file = dir("fb_data/hurricane_zeta", full.names = TRUE)) %>%
  # And now going file by file,
  split(.$file) %>%
  # Read them all in, separated as data.frames in a list,
  # and then bind the data.frames together, using a file ID
  map_dfr(~read_csv(.$file), .id = "file") %>%
  # now save!
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_x = geometry %>% word(1),
    from_y = geometry %>% word(2),
    to_x = geometry %>% word(3),
    to_y = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_x, from_y, to_x, to_y), as.numeric) %>%
 # Filter out any geocodes that didn't make it
  filter(!is.na(from_x) & !is.na(from_y) & !is.na(to_x) & !is.na(to_y)) %>%
  # Create line-springs in C really really fast
  mutate(geometry = sprintf("LINESTRING(%s %s, %s %s)", 
                            from_x, from_y, 
                            to_x, to_y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  # For the coordinate reference system, let's use:
  # the World Geodetic System from 1984, commonly used with degrees
  # WGS 84 https://spatialreference.org/ref/epsg/wgs-84/
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  saveRDS("fb_data/hurricane_zeta_edges_neighborhood.rds")



# Let's get points for each polygon
bind_rows(
  # Import just the origin points
  read_rds("fb_data/hurricane_zeta_edges_neighborhood.rds") %>%
    as.data.frame() %>%
    select(polygon_id = start_polygon_id, polygon_name = start_polygon_name,
           x = from_x, y = from_y),
  # Import just the destination points
  read_rds("fb_data/hurricane_zeta_edges_neighborhood.rds") %>%
    as.data.frame() %>%
    select(polygon_id = end_polygon_id, polygon_name = end_polygon_name,
           x = to_x, y = to_y)) %>%
  # Remove duplicates
  # Take every polygon_id,
  # shorted each x and y coordinate to 5 decimal places
  # (so that we don't have duplicative points)
  # Some polygons have the same ID but different coordinates;
  # but upon inspection, they are NEARLY identical coordinates;
  # Think: about a 100 meter difference.
  # For each polygon, we're going to grab just ONE coordinate each, 
  # rounded to 5 decimal places
  group_by(polygon_id) %>%
  # For each polygon id, grab the first x coordinate of its kind and round to 5
  summarize(x = round(x[1], 5),
            y = round(y[1], 5)) %>%
  ungroup() %>%
  distinct() %>%
  # Create points in C really really fast
  mutate(geometry = sprintf("POINT(%s %s)", x, y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  # Now, let's join in the census county subdivision of each point
  st_transform(crs = CRS(paste0(aea))) %>%
  # Let's join in the census county subdivision this neighborhood sits in
  # Zooming in just to Louisiana
  st_join(read_rds("shapes/csub.rds") %>% 
            select(csub = geoid, geometry) %>%
            filter(str_sub(csub,1,2) == "22") %>%
            st_transform(crs = CRS(paste0(aea))),
          # Return just the geometry with the largest overlap
          largest = TRUE) %>%
  # Now keep just the points in Louisiana county subdivisions
  filter(!is.na(csub)) %>%
  # Let's join in the zipcode this neighborhood sits in 
  # Transform back to WGS84 coordinate reference system, for easy interaction with edges
  st_transform(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("fb_data/hurricane_zeta_nodes_neighborhood.rds")

```

### 1.2.2 Aggregate to County Subdivision

Now, we want to aggregate to the census county subdivision level. Unfortunately, our files are so large that it's quite computationally expensive to do the necessary steps. Instead, we're going to zoom into a single state to do it, looking at Southeastern Louisiana.

```{r}
# Obtain state polygon for Louisiana
states <- read_rds("shapes/states.rds") %>%
  filter(state == "LA")

# Obtain IDs of neighborhoods located in Southeastern Louisiana
read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds") %>%
  st_join(states) %>%
  as_tibble() %>%
  filter(state == "LA") %>%
  select(polygon_id) %>%
  unlist() %>%
  saveRDS("fb_data/hurricane_zeta_ids_neighborhood.rds")
remove(state)

```

```{r}
ids <- read_rds("fb_data/hurricane_zeta_ids_neighborhood.rds")

# Now, let's extract just the edges within Southeastern Louisiana
edgelist <- read_rds("fb_data/hurricane_zeta_edges_neighborhood.rds") %>% 
  as_tibble() %>%
  # Grab key fields
  select(start_polygon_id, end_polygon_id, 
         n_crisis, n_baseline, date_time) %>%
    # Filter into just Louisiana Neighborhoods
  filter(start_polygon_id %in% ids & end_polygon_id %in% ids) 


# Import neighborhoods (originally 500,000 plus, now just 18370 in LA)
neighborhoods <- read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds") %>%
  as_tibble() %>%
  # Filter into just Louisiana Neighborhoods
  filter(polygon_id %in% ids) %>%
  select(polygon_id, csub)  %>%
  distinct()

# Get centroids as points, which we can easily join together
centroids <- read_rds("shapes/csub_centroid.rds") %>%
  filter(str_sub(geoid, 1,2 ) == "22") %>%
  mutate(x = st_coordinates(geometry)[,1],
         y = st_coordinates(geometry)[,2]) %>%
  as.data.frame() %>% 
  select(-geometry) %>%
  distinct()

# Aggregate edgelist from neighborhood to census subdivision level
edgelist %>%
  # First, let's join into the edgelist the census subdivision of the origin neighborhood
  left_join(by = c("start_polygon_id" = "polygon_id"),
            y = neighborhoods %>%
              rename(from = csub)) %>%
  # Now let's join into the edgelist the census subdivision of the destination neighborhood
  left_join(by = c("end_polygon_id" = "polygon_id"),
            y = neighborhoods %>%
              rename(to = csub)) %>%
  # Aggregate from neighborhood to census subdivision
  # Adding together all people who moved between neighborhoods for this tract
  group_by(from, to, date_time) %>%
  summarize(n_crisis = sum(n_crisis, na.rm = TRUE),
            n_baseline = sum(n_baseline, na.rm = TRUE)) %>%
    ungroup() %>%
  filter(!is.na(from), !is.na(to), !is.na(date_time), 
         !is.na(n_crisis), !is.na(n_baseline)) %>%
  # Next, let's join in the origin subdivision centroid points
  left_join(by = c("from" = "geoid"),
            y = centroids %>%
              rename(from_x = x,
                     from_y = y)) %>%
  # Next, let's join in the destination subdivision centroid points
  left_join(by = c("to" = "geoid"),
            y = centroids %>%
              rename(to_x = x,
                     to_y = y)) %>%
  # Filter out any geocodes that didn't make it
  filter(!is.na(from_x) & !is.na(from_y) & !is.na(to_x) & !is.na(to_y))  %>% 
  # Create line-springs in C really really fast
  mutate(geometry = sprintf("LINESTRING(%s %s, %s %s)", 
                            from_x, from_y, 
                            to_x, to_y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  # For the coordinate reference system, let's use:
  # the World Geodetic System from 1984, commonly used with degrees
  # WGS 84 https://spatialreference.org/ref/epsg/wgs-84/
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  # Finally, let's tally evacuation
  # If crisis movement was greater than baseline movement, record it as evacuation;
  # If crisis movement was lower than baseline movement, leave it 0;
  # We don't really want to capture negative evacuation.
  mutate(evacuation = n_crisis - n_baseline) %>%
  select(from, to, date_time, evacuation, geometry) %>%
  saveRDS("raw_data/hurricane_zeta_edges_csub.rds")
```



```{r}
# Let's identify all the counties in our Southeastern Louisiana Sample
read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique() %>%
  saveRDS("raw_data/hurricane_zeta_counties.rds")
```


# 2. Glass Fire in Napa and Sonoma Counties

Next, we'll tally movement for a major fire in 2020 in California.

## 2.1 Geographic Data

### 2.1.1 Get Polygons

First, let's use the excellent tigris package to get polygons for every level of geography we're looking at for Hurricane Zeta.

```{r}
# Import census tracts for the state of Louisiana
# Let's import census tracts for several states
get_tracts = function(STATE){
  tigris::tracts(state = STATE, cb = TRUE, year = 2019) %>%
    st_as_sf() %>%
    st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
    # set all variable names to lowercase
    magrittr::set_colnames(value = names(.) %>% tolower()) %>%
    select(geoid, area_land = aland, geometry) %>%
    filter(!is.na(geoid)) %>%
    return()
}
# Import and bind census tracts for every state that Hurricane Zeta affected
c("CA") %>%
  map(~get_tracts(.), .id = "states") %>%
  dplyr::bind_rows() %>%
  saveRDS("shapes/ca_tracts.rds")

# Import all county subdivisions in the US
# (copied from above)
data.frame(state = state.abb) %>%
  split(.$state) %>%
  map(~tigris::county_subdivisions(state = .$state, year = 2019, cb = TRUE) %>%
        st_as_sf() %>%
        select(geoid = GEOID, name = NAME, area_land = ALAND, geometry) %>%
        st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))),
      .id = "state") %>%
  dplyr::bind_rows() %>%
  saveRDS("shapes/csub.rds")

# Import all zipcodes in the US 
tigris::zctas(year = 2019, cb = TRUE) %>%
  st_as_sf() %>%
  select(geoid = GEOID10, area_land = ALAND10, geometry) %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/zipcodes.rds")
  
# Import all counties in the US
tigris::counties(year = 2019, cb = TRUE) %>%
  st_as_sf()  %>%
  select(geoid = GEOID, name = NAME, area_land = ALAND, geometry) %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/counties.rds")

# Import continental united states
tigris::states(cb = TRUE, year = 2019) %>%
  st_as_sf() %>%
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  select(state = STUSPS, name = NAME, geometry) %>%
  filter(!state %in% c("HI", "AK", "PR", "VI", "MP", "GU", "AS")) %>%
  saveRDS("shapes/states.rds")

```


### 2.1.2 Get Centroids

Now, let's give each zipcode, tract, and county a centroid.

```{r}
# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# For counties
read_rds("shapes/counties.rds") %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid, name) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/counties_centroid.rds")

# For county subdivisions (# copied from above)
read_rds("shapes/csub.rds") %>%
  # Join in state names
  st_join(read_rds("shapes/states.rds")) %>%
  filter(state %in% c("LA", "AL", "MS", "GA", "VA",
  "KY", "NC", "SC", "TN", "FL", "CA")) %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/csub_centroid.rds")

# For zipcodes
read_rds("shapes/zipcodes.rds") %>%
  # Join in state names
  st_join(read_rds("shapes/states.rds")) %>%
  filter(state %in% c("CA", "OR", "NV", "AZ")) %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry, of_largest_polygon = TRUE)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/ca_zipcodes_centroid.rds")

# For census tracts
read_rds("shapes/ca_tracts.rds") %>%
  # Transform to Equal Area Conic Projection, for best centroid calculation
  st_transform(crs = CRS(paste0(aea))) %>%
  # Calculate centroids for every county
  group_by(geoid) %>%
  summarize(geometry = st_centroid(geometry)) %>%
  ungroup() %>%
  # Transform back to WSG84
  st_transform(CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("shapes/ca_tracts_centroid.rds")
```

## 2.2. Geolocate Neighborhoods and Edgelist

### 2.2.1 Geolocated Neighborhoods

```{r}
# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Compile Facebook data
# First, let's grab all the names of the files from Hurricane Zeta,
data.frame(file = dir("fb_data/glass_fire", full.names = TRUE)) %>%
  # And now going file by file,
  split(.$file) %>%
  # Read them all in, separated as data.frames in a list,
  # and then bind the data.frames together, using a file ID
  map_dfr(~read_csv(.$file), .id = "file") %>%
  # now save!
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_x = geometry %>% word(1),
    from_y = geometry %>% word(2),
    to_x = geometry %>% word(3),
    to_y = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_x, from_y, to_x, to_y), as.numeric) %>%
 # Filter out any geocodes that didn't make it
  filter(!is.na(from_x) & !is.na(from_y) & !is.na(to_x) & !is.na(to_y)) %>%
  # Create line-springs in C really really fast
  mutate(geometry = sprintf("LINESTRING(%s %s, %s %s)", 
                            from_x, from_y, 
                            to_x, to_y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  # For the coordinate reference system, let's use:
  # the World Geodetic System from 1984, commonly used with degrees
  # WGS 84 https://spatialreference.org/ref/epsg/wgs-84/
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  saveRDS("fb_data/glass_fire_edges_neighborhood.rds")
```


```{r}
# Let's get points for each polygon
bind_rows(
  # Import just the origin points
  read_rds("fb_data/glass_fire_edges_neighborhood.rds") %>%
    as.data.frame() %>%
    select(polygon_id = start_polygon_id, polygon_name = start_polygon_name,
           x = from_x, y = from_y),
  # Import just the destination points
  read_rds("fb_data/glass_fire_edges_neighborhood.rds") %>%
    as.data.frame() %>%
    select(polygon_id = end_polygon_id, polygon_name = end_polygon_name,
           x = to_x, y = to_y)) %>%
  # Remove duplicates
  # Take every polygon_id,
  # shorted each x and y coordinate to 5 decimal places
  # (so that we don't have duplicative points)
  # Some polygons have the same ID but different coordinates;
  # but upon inspection, they are NEARLY identical coordinates;
  # Think: about a 100 meter difference.
  # For each polygon, we're going to grab just ONE coordinate each, 
  # rounded to 5 decimal places
  group_by(polygon_id) %>%
  # For each polygon id, grab the first x coordinate of its kind and round to 5
  summarize(x = round(x[1], 5),
            y = round(y[1], 5)) %>%
  ungroup() %>%
  distinct() %>%
  # Create points in C really really fast
  mutate(geometry = sprintf("POINT(%s %s)", x, y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  # Now, let's join in the census tract and zipcode of each point
  st_transform(crs = CRS(paste0(aea))) %>%
  # Let's join in the census tract this neighborhood sits in
  # Zooming in just to the four-state area around california
  st_join(read_rds("shapes/csub.rds") %>% 
            select(csub = geoid, geometry) %>%
            filter(str_sub(csub,1,2) %in% c("06", "32", "04", "41")) %>%
            st_transform(crs = CRS(paste0(aea))),
          # Return just the geometry with the largest overlap
          largest = TRUE) %>%
  # Now keep just the points in the California tracts
  filter(!is.na(csub)) %>%
  # Transform back to WGS84 coordinate reference system, for easy interaction with edges
  st_transform(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"))) %>%
  saveRDS("fb_data/glass_fire_nodes_neighborhood.rds")
```



### 2.2.2 Aggregate to Census Tract

Now, we want to aggregate to the census tract level. Unfortunately, our files are so large that it's quite computationally expensive to do the necessary steps. Instead, we're going to zoom into a single state to do it, looking at Southeastern Louisiana.

```{r}
# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Obtain state polygon for California
states <- read_rds("shapes/states.rds") %>%
  filter(state == "CA")

# Obtain IDs of neighborhoods located in California
read_rds("fb_data/glass_fire_nodes_neighborhood.rds") %>%
  st_join(states) %>%
  as_tibble() %>%
  filter(state == "CA") %>%
  select(polygon_id) %>%
  unlist() %>%
  saveRDS("fb_data/glass_fire_ids_neighborhood.rds")
remove(state)

```

```{r}
ids <- read_rds("fb_data/glass_fire_ids_neighborhood.rds")

# Now, let's extract just the edges within Southeastern Louisiana
edgelist <- read_rds("fb_data/glass_fire_edges_neighborhood.rds") %>% 
  as_tibble() %>%
  # Grab key fields
  select(start_polygon_id, end_polygon_id, 
         n_crisis, n_baseline, date_time) %>%
    # Filter into just California Neighborhoods
  filter(start_polygon_id %in% ids & end_polygon_id %in% ids) 


# Import neighborhoods
neighborhoods <- read_rds("fb_data/glass_fire_nodes_neighborhood.rds") %>%
  as_tibble() %>%
  # Filter into just California Neighborhoods
  filter(polygon_id %in% ids) %>%
  select(polygon_id, csub)  %>%
  distinct()

# Get centroids as points, which we can easily join together
centroids <- read_rds("shapes/csub_centroid.rds") %>%
  filter(str_sub(geoid, 1,2 ) == "06") %>%
  mutate(x = st_coordinates(geometry)[,1],
         y = st_coordinates(geometry)[,2]) %>%
  as.data.frame() %>% 
  select(-geometry) %>%
  distinct()

# Aggregate edgelist from neighborhood to census tract level
edgelist %>%
  # First, let's join into the edgelist the census tract of the origin neighborhood
  left_join(by = c("start_polygon_id" = "polygon_id"),
            y = neighborhoods %>%
              rename(from = csub)) %>%
  # Now let's join into the edgelist the census tract of the destination neighborhood
  left_join(by = c("end_polygon_id" = "polygon_id"),
            y = neighborhoods %>%
              rename(to = csub)) %>%
  # Aggregate from neighborhood to census tract
  # Adding together all people who moved between neighborhoods for this tract
  group_by(from, to, date_time) %>%
  summarize(n_crisis = sum(n_crisis, na.rm = TRUE),
            n_baseline = sum(n_baseline, na.rm = TRUE)) %>%
    ungroup() %>%
  filter(!is.na(from), !is.na(to), !is.na(date_time), 
         !is.na(n_crisis), !is.na(n_baseline)) %>%
  # Next, let's join in the origin subdivision centroid points
  left_join(by = c("from" = "geoid"),
            y = centroids %>%
              rename(from_x = x,
                     from_y = y)) %>%
  # Next, let's join in the destination subdivision centroid points
  left_join(by = c("to" = "geoid"),
            y = centroids %>%
              rename(to_x = x,
                     to_y = y)) %>%
  # Filter out any geocodes that didn't make it
  filter(!is.na(from_x) & !is.na(from_y) & !is.na(to_x) & !is.na(to_y))  %>% 
  # Create line-springs in C really really fast
  mutate(geometry = sprintf("LINESTRING(%s %s, %s %s)", 
                            from_x, from_y, 
                            to_x, to_y)) %>%
  # Set the crs and tell the object to become sf format using the geometry column
  # For the coordinate reference system, let's use:
  # the World Geodetic System from 1984, commonly used with degrees
  # WGS 84 https://spatialreference.org/ref/epsg/wgs-84/
  st_as_sf(crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")),
           wkt = "geometry") %>%
  # Finally, let's tally evacuation
  # If crisis movement was greater than baseline movement, record it as evacuation;
  # If crisis movement was lower than baseline movement, leave it 0;
  # We don't really want to capture negative evacuation.
  mutate(evacuation = n_crisis - n_baseline) %>%
  select(from, to, date_time, evacuation, geometry) %>%
  saveRDS("raw_data/glass_fire_edges_csub.rds")
```
```{r}
# Let's identify all the counties in our California Sample
read_rds("fb_data/glass_fire_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique() %>%
  saveRDS("raw_data/glass_fire_counties.rds")
```


# 3. Visualize

## Hydrology

```{r, eval = FALSE}
library(tidyverse)
library(sf)
library(rgdal)
library(tigris)


# Let's identify all the counties in our Southeastern Louisiana Sample
mycounties <- read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique()

# Now let's import them
counties <- read_rds("shapes/counties.rds") %>%
            filter(geoid %in% mycounties)
# Generate hydrology
tigris::area_water(state = "LA", county = str_sub(mycounties, 3,5) %>% unique(),
                            year = 2019) %>%
  st_as_sf() %>%
  st_transform(crs = wgs) %>%
  st_join(counties) %>%
  filter(!is.na(name)) %>%
  st_crop(counties) %>%
  saveRDS("shapes/la_hydro_area.rds")


# Let's identify all the counties in our Southeastern Louisiana Sample
mycounties <- read_rds("fb_data/glass_fire_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique()

# Now let's import them
counties <- read_rds("shapes/counties.rds") %>%
            filter(geoid %in% mycounties)
# Generate hydrology
tigris::area_water(state = "CA", county = str_sub(mycounties, 3,5) %>% unique(),
                            year = 2019) %>%
  st_as_sf() %>%
  st_transform(crs = wgs) %>%
  st_join(counties) %>%
  filter(!is.na(name)) %>%
  st_crop(counties) %>%
  saveRDS("shapes/ca_hydro_area.rds")

```

## Louisiana

```{r}
library(tidyverse)
library(sf)
library(rgdal)
library(ggpubr)
#ggspatial

# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
# World Global System 1984
wgs <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

# Let's identify all the counties in our Southeastern Louisiana Sample
mycounties <- read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique()

# Now let's import them
counties <- read_rds("shapes/counties.rds") %>%
            filter(geoid %in% mycounties)
# Create a single backdrop layer
back <- counties %>%
  summarize(geometry = st_union(geometry))
# Import cities/county subdivisions too
cities <- read_rds("shapes/csub.rds") %>%
            filter(str_sub(geoid, 1,5) %in% mycounties)
# Import facebook neighborhoods
neighborhoods <- read_rds("fb_data/hurricane_zeta_nodes_neighborhood.rds")

# Import key destinations
points <- bind_rows(
  data.frame(latitude = 29.9511, longitude = -90.0715,
             name = "New Orleans"),
  data.frame(latitude = 30.4515, longitude = -91.1871,
             name = "Baton Rouge")) %>%
  st_as_sf(coords = c("longitude", "latitude"),
           crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")))

# Import hydrology
water <- read_rds("shapes/la_hydro_area.rds") %>%
  select(HYDROID, geometry)

g1 <- ggplot() +
  # Add a nice grey border to everything
  geom_sf(data = back,
          size = 5, fill = NA, color = "darkgrey") +
  geom_sf(data = back,
          size = 5, fill = "#9DBE76", color = NA) +
  # Add hydrology
  geom_sf(data = water, color = NA, fill = "steelblue", alpha = 0.5) +
  # Add lines for each city
  geom_sf(data = cities,
          fill = NA, color = "lightgrey", size = 0.5) +
  # Add county boundaries
  geom_sf(data = counties, color = "black", size = 1, fill = NA) +
  # Add points for neighborhoods
  geom_sf(data = neighborhoods,
          color = "#785EF0", alpha = 0.75, size = 2) +
    geom_sf(data = neighborhoods,
          color = "white", alpha = 0.75, size = 1.5) +
  # Overlay key cities as labels
  geom_sf_label(data = points, mapping = aes(label = name), size = 3) +
  # Make the theme
  theme_void(base_size = 14) + 
  theme(plot.subtitle = element_text(hjust = 0.5)) +
 labs(subtitle = "Hurricane Zeta in Southeastern Louisiana\nFacebook Recorded Neighborhoods (n = 102)\nby County Subdivision (n = 176) and County (n = 21)") +
   # Add compass and scale
  ggspatial::annotation_scale(location = "bl", pad_x = unit(0.5, "in")) +
  ggspatial::annotation_north_arrow(
    which_north = "true",
    height = unit(0.25, "in"),
    width = unit(0.25, "in"),
    location = "bl")

g1 +
  ggsave("viz/la_neighborhoods_and_states.png", dpi = 300, width = 5, height = 6)
```

## California

```{r}
library(tidyverse)
library(sf)
library(rgdal)
library(ggpubr)
#ggspatial

# Get Albers Equal Area Conic Projection
#https://spatialreference.org/ref/esri/north-america-albers-equal-area-conic/
aea <- "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"
# World Global System 1984
wgs <- "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

# Let's identify all the counties in our Southeastern Louisiana Sample
mycounties <- counties <- read_rds("fb_data/glass_fire_nodes_neighborhood.rds")$csub %>%
  str_sub(1,5) %>% unique()


# Now let's import them
counties <- read_rds("shapes/counties.rds") %>%
            filter(geoid %in% mycounties)
# Create a single backdrop layer
back <- counties %>%
  summarize(geometry = st_union(geometry))
# Import cities/county subdivisions too
cities <- read_rds("shapes/csub.rds") %>%
            filter(str_sub(geoid, 1,5) %in% mycounties)
# Import facebook neighborhoods
neighborhoods <- read_rds("fb_data/glass_fire_nodes_neighborhood.rds")

# Import key destinations
points <- bind_rows(
  data.frame(latitude = 37.7749, longitude = -122.4194,
             name = "San Francisco"),
  data.frame(latitude = 38.5616501, longitude = -121.5833423,
             name = "Sacramento"),
  data.frame(latitude = 38.4354662, longitude = -122.8439749,
             name = "Santa Rosa"),
  data.frame(latitude = 37.2965298, longitude = -122.0982949,
             name = "San Jose")) %>%
  st_as_sf(coords = c("longitude", "latitude"),
           crs = CRS(paste0("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")))

# Import hydrology
water <- read_rds("shapes/ca_hydro_area.rds") %>%
  select(HYDROID, geometry)

g2 <- ggplot() +
  # Add a nice grey border to everything
  geom_sf(data = back,
          size = 5, fill = NA, color = "darkgrey") +
  geom_sf(data = back,
          size = 5, fill = "#9DBE76", color = NA) +
  # Add hydrology
  geom_sf(data = water, color = NA, fill = "steelblue", alpha = 0.5) +
  # Add lines for each city
  geom_sf(data = cities,
          fill = NA, color = "lightgrey", size = 0.5) +
  # Add county boundaries
  geom_sf(data = counties, color = "black", size = 1, fill = NA) +
  # Add points for neighborhoods
  geom_sf(data = neighborhoods,
          color = "#785EF0", alpha = 0.75, size = 2) +
    geom_sf(data = neighborhoods,
          color = "white", alpha = 0.75, size = 1.5) +
  # Overlay key cities as labels
  geom_sf_label(data = points, mapping = aes(label = name), size = 3) +
  # Make the theme
  theme_void(base_size = 14) + 
  theme(plot.subtitle = element_text(hjust = 0.5)) +
   labs(subtitle = "Glass Fire in Napa and Sonoma Counties\nFacebook Recorded Neighborhoods (n = 351)\nby County Subdivision (n = 181) and County (n = 32)") +
  # Add compass and scale
  ggspatial::annotation_scale(location = "bl", pad_x = unit(0.5, "in")) +
  ggspatial::annotation_north_arrow(
    which_north = "true",
    height = unit(0.25, "in"),
    width = unit(0.25, "in"),
    location = "bl")

g2 +
  ggsave("viz/ca_neighborhoods_and_states.png", dpi = 300, height = 6, width = 4)

ggpubr::ggarrange(g1,g2, ncol = 2) +
  theme(plot.margin = margin(0,0,0,0, unit = "cm")) +
  ggsave("viz/neighborhoods_and_states.png", dpi = 500, width = 10, height = 5)


```
```{r}
rm(list = ls())

```